setwd("d:/PORES Dropbox/Stephen Pettigrew/ranked-choice-voting/replication files/")

require(tidyverse)
options(scipen = 999)
source("code/geography-colors.R")


summarize.rates <- function(x, include.count = F){
  z <- x %>%
    summarize(count = n(),
              error.rate = mean(any.error),
              overrank.rate = mean(overrank),
              overvote.rate = mean(overvote),
              skips = mean(skip),
              .groups = "drop")
  if(!include.count) z$count <- NULL
  return(z)
}


raw <- readRDS("final-data/all-errors-and-votes.RDS")

offices <- raw$offices %>%
  filter(geography == "AK") %>%
  filter(date == "2022-11-08") %>%
  filter(rcv) %>%
  select(office.id, office) %>%
  mutate(office = sapply(strsplit(office, " "), "[", 1),
         office = paste("State", office),
         office = ifelse(office.id == 3, "US Senate", office),
         office = ifelse(office.id == 4, "US House", office),
         office = ifelse(office.id == 5, "Governor", office),
         
         office2 = office,
         office2 = ifelse(grepl("State",office2), office2, "Statewide"))

sw <- raw$errors %>%
  filter(geography == "AK") %>%
  filter(date == "2022-11-08") %>%
  filter(office.id %in% offices$office.id[offices$office2 == "Statewide"])


leg <- raw$errors %>%
  filter(geography == "AK") %>%
  filter(date == "2022-11-08") %>%
  filter(office.id %in% offices$office.id[grepl("State ", offices$office2)]) %>%
  merge(offices, by = "office.id", all.x = T) %>%
  mutate(voted = bubble.pattern != "",
         office = tolower(gsub("State ", "", office))) %>%
  select(ballot.id, office, voted) %>%
  spread(key = office, value = voted) %>%
  filter(!(is.na(house) | is.na(senate))) %>%
  
  mutate(st.leg = paste(house, senate),
         st.leg = ifelse(st.leg == "TRUE TRUE", "\nBoth State House\n& State Senate\n", st.leg),
         st.leg = ifelse(st.leg %in% c("TRUE FALSE","FALSE TRUE"), "\nEither State House\nor State Senate\n", st.leg),
         st.leg = ifelse(st.leg == "FALSE FALSE", "Neither", st.leg),
         st.leg = factor(st.leg, levels = c("\nBoth State House\n& State Senate\n", 
                                            "\nEither State House\nor State Senate\n",
                                            "Neither"))) %>%
  select(ballot.id, st.leg)




ak.tmp <- sw %>%
  merge(leg, by = "ballot.id") %>%
  mutate(any.error = overvote | overrank | skip) %>%
  filter(bubble.pattern != "")



res <- ak.tmp %>%
  group_by(office.id, st.leg) %>%
  summarize.rates(include.count = T) %>%
  ungroup() %>%
  merge(offices %>%
          select(office.id, office), 
        by = "office.id") %>%
  gather(-c(office.id, office, st.leg, count), key = "error", value = "pct") %>%
  mutate(error2 = gsub("error.rate", "Mismark of any type", error),
         error2 = gsub("overrank.rate", "Overranked a candidate", error2),
         error2 = gsub("overvote.rate", "Overvoted a ranking", error2),
         error2 = gsub("skips", "Front or interior skip", error2),
         error2 = factor(error2, 
                         levels = c("Mismark of any type",
                                    "Front or interior skip",
                                    "Overranked a candidate",
                                    "Overvoted a ranking")),
         
         se = sqrt(pct * (1-pct) / count),
         lower = pct + qnorm(.025) * se,
         upper = pct + qnorm(.975) * se,
         
         office = factor(office, labels = c("US Senate","US House","Governor")))





res %>%
  ggplot(aes(x = office, y = pct, color = st.leg, shape = st.leg)) + 
  geom_point(position = position_dodge(.3)) +
  geom_errorbar(aes(ymax = upper, ymin = lower),
                position = position_dodge(.3),
                width = 0) + 
  scale_y_continuous("Percent of voters who mismarked their ballot",
                     labels = scales::percent_format(.1),
                     limits = c(0, NA)) + 
  scale_color_discrete("Among those\nwho voted for") + 
  scale_shape_discrete("Among those\nwho voted for") + 
  xlab("") + 
  theme_bw() +
  facet_wrap(~error2, scales = "free_y")






## Appendix tables

tmp <- res %>%
  select(office.id, st.leg, office, error, count, pct) %>%
  mutate(rate = sprintf("%.2f%% (%s)", 
                        pct * 100,
                        prettyNum(count, big.mark = ",")),
         st.leg = sapply(strsplit(as.character(st.leg), " "), "[", 1),
         st.leg = tolower(gsub("\n","",st.leg))) %>%
  pivot_wider(names_from = st.leg, values_from = c(count, pct, rate)) %>%
  
  mutate(p = (pct_neither * count_neither + pct_either * count_either) / (count_neither + count_either),
         z.either = (pct_neither - pct_either) / sqrt(p * (1-p) * (1/count_neither + 1/count_either)),
         
         p = (pct_neither * count_neither + pct_both * count_both) / (count_neither + count_both),
         z.both = (pct_neither - pct_both) / sqrt(p * (1-p) * (1/count_neither + 1/count_both)),
         
         sign95.either = ifelse(z.either >= qnorm(.975),"*",""),
         sign99.either = ifelse(z.either >= qnorm(.995),"*",""),
         sign99.9.either = ifelse(z.either >= qnorm(.9995),"*",""),
         
         sign95.both = ifelse(z.both >= qnorm(.975),"*",""),
         sign99.both = ifelse(z.both >= qnorm(.995),"*",""),
         sign99.9.both = ifelse(z.both >= qnorm(.9995),"*",""),
         
         z.either = sprintf("%.2f", z.either),
         z.both = sprintf("%.2f", z.both),
         
         z.either = paste0(z.either, sign95.either, sign99.either, sign99.9.either),
         z.both = paste0(z.both, sign95.both, sign99.both, sign99.9.both)) %>%
  select(-p) %>%
  
  arrange(error, office) %>%
  
  mutate(error = gsub("skips","Skip",error),
         error = gsub("error.rate","Any error",error),
         error = gsub("overrank.rate","Overrank",error),
         error = gsub("overvote.rate","Overvote",error),
         office = as.character(office)) %>%
  select(error, office, rate_neither, 
         rate_both, z.both,
         rate_either, z.either)



text1 <- tmp %>%
  select(-c(rate_either,z.either)) %>%
  stargazer::stargazer(summary = F,
                       label = "z-scores-ak-roll-off-a",
                       title = "Error rates of voters who voted in neither state legislative race versus those who voted in both",
                       no.space = T,
                       rownames = F)

insert <- c("         &        & \\multicolumn{2}{c}{Mis-mark rate (n voters) of those who...}   &            \\\\ ",
            "Mis-mark &        & Didn't vote for        & Voted for both                         & Z-score of \\\\ ",
            "type     & Office & State House or Senate  & State House and Senate                 & difference \\\\ ")

note <- "\\textit{Note:}  & \\multicolumn{4}{l}{$^{*}$p$<$0.05; $^{**}$p$<$0.01; $^{***}$p$<$0.001} \\ "

line <- which(grepl("error & office", text1))

tex1 <- c(text1[1:(line-1)], 
         insert, 
         text1[(line+1):(length(text1)-2)],
         note,
         text1[(length(text1)-1):length(text1)])

tex1 <- gsub("ccccc","ll|cc|l",tex1)







text2 <- tmp %>%
  select(-c(rate_both,z.both)) %>%
  stargazer::stargazer(summary = F,
                       label = "z-scores-ak-roll-off-b",
                       title = "Error rates of voters who voted in neither state legislative race versus those who voted in one",
                       no.space = T,
                       rownames = F)

insert <- c("         &        & \\multicolumn{2}{c}{Mis-mark rate (n voters) of those who...}   &            \\\\ ",
            "Mis-mark &        & Didn't vote for        & Voted for either                       & Z-score of \\\\ ",
            "type     & Office & State House or Senate  & State House or Senate                  & difference \\\\ ")

note <- "\\textit{Note:}  & \\multicolumn{4}{l}{$^{*}$p$<$0.05; $^{**}$p$<$0.01; $^{***}$p$<$0.001} \\ "

line <- which(grepl("error & office", text2))

tex2 <- c(text2[1:(line-1)], 
          insert, 
          text2[(line+1):(length(text2)-2)],
          note,
          text2[(length(text2)-1):length(text2)])

tex2 <- gsub("ccccc","ll|cc|l",tex2)
